package regex;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Objects;

/**
 * @fileName: WeiboRegex
 * @author: Han
 * @date: 2018/6/1 14:33
 * @dscription: 解析微博页面内容
 */
public class WeiboRegex {

    /**
     * 解析微博内容
     * @param weiboPage 页面源码
     * @return 解析后的内容
     */
    public static String parseWeibo(String weiboPage){

        StringBuilder content = new StringBuilder();

        //解析出主要内容块
        String block = Regex.matchString(weiboPage,"(?<=<div class=\\\\\"WB_detail)[\\s\\S]*(?=<div " +
                "class=\\\\\"WB_like\\\\\")");

        String review = Regex.matchString(weiboPage,"(?<=<div class=\\\\\"WB_text W_f14\\\\\"[\\s\\S]{1," +
                "100}?[^\\\\n]\\\\n)[\\s\\S]*?[^<\\\\/div>](?=<\\\\/div>)");
        content.append(review);

        //如果本条微博为转发别人的微博
        if (block.contains("<div class=\\\"WB_expand S_bg1")){
            //转发主题
            String transmit = Regex.matchString(block,"(?<=<div class=\\\\\"WB_expand S_bg1\\\\\"" +
                    "[\\s\\S]{1,100}?[^>\\\\n]>\\\\n)[\\s\\S]*(?=<div class=\\\\\"WB_func clearfix\\\\\">)");
            //被转发人昵称
            String nickname = Regex.matchString(transmit,"(?<=nick-name=\\\\\")[\\s\\S]*?[^\\\\\"](?=\\\\\")");
            content.append("//").append(nickname);
            //被转发内容
            String transmitComment = Regex.matchString(transmit,"(?<=<div class=\\\\\"WB_text\\\\\" " +
                    "node-type=\\\\\"feed_list_reason\\\\\">\\\\n)[\\s\\S]*?[^<\\\\/div>](?=<\\\\/div>)");
            content.append(":").append(transmitComment);
        }

        return Regex.deleteRedundantChar(content.toString());
    }
}
